library(PoissonBinomial)
library(FNN)
library(GoFKernel)

# preliminaries
set.seed(2022)

# control constants
n <- 1000
whiteProps <- c(0.7, 0.8, 0.9) # proportion of the precinct voters that are White 
whiteShift <- -0.10            # Dem support probabilities overestimated by 10 percentage points for White voters
nonWhiteShift <- 0.10          # Dem support probabilities underestimated by 10 percentage points for Black voters

# list of the six data-generating processes 
dgps <- c('uniform', 'closeToZero', 'closeToOne', 'extremal', 'central', 'bimodal')

############################################
##            helper functions            ##
############################################

# logit swing function
logitSwing <- function(probs, D, lwr = 0, upr = 5e4, eps = 1e-8) {
  
  # compute logit swing
  while(upr - lwr > eps) {
    mid <- (lwr + upr)/2
    
    if(sum(1/(1 + (1-probs)/probs*mid)) < D)
      upr <- mid
    else
      lwr <- mid
  }

  return(1/(1 + (1-probs)/probs*mid))
}

# data generation function
dataGenerate <- function(n, dist) {
  
  # generate the data
  if(dist == 'uniform')
    probs <- runif(n)
  else if(dist == 'closeToZero')
    probs <- rbeta(n, 0.1, 0.5)
  else if(dist == 'closeToOne')
    probs <- rbeta(n, 0.5, 0.1)
  else if(dist == 'extremal')
    probs <- sample(c(rbeta(n/2, 0.1, 3), rbeta(n/2, 3, 0.1)))
  else if(dist == 'central')
    probs <- rbeta(n, 3, 3)
  else if(dist == 'bimodal')
    probs <- sample(c(rbeta(n/2, 3, 10), rbeta(n/2, 10, 3)))
  
  return(probs)
}

############################################
##             calling script             ##
############################################

# iterate through every combination of white probabilities and data-generating processes
results <- lapply(whiteProps, FUN = function(w) {
  sapply(dgps, FUN = function(dgp) {

      # generate the initial scores
      estScores <- dataGenerate(n, dgp)
      
      # get the true scores
      whiteIndicators <- sample(c(rep(1, n*w), rep(0, n - n*w)))
      trueScores <- rep(NA, n)
      trueScores[whiteIndicators == 1] <- logitSwing(estScores[whiteIndicators == 1], 
                                                     (mean(estScores[whiteIndicators == 1]) + whiteShift)*sum(whiteIndicators))
      trueScores[whiteIndicators == 0] <- logitSwing(estScores[whiteIndicators == 0], 
                                                     (mean(estScores[whiteIndicators == 0]) + nonWhiteShift)*sum(1 - whiteIndicators))
      trueScores <- ifelse(trueScores < 0, 0, ifelse(trueScores > 1, 1, trueScores))
      
      # error checking
      if(mean(estScores[whiteIndicators == 1]) + whiteShift < 0 || mean(estScores[whiteIndicators == 1]) + whiteShift > 1 ||
         mean(estScores[whiteIndicators == 0]) + nonWhiteShift < 0 || mean(estScores[whiteIndicators == 0]) + nonWhiteShift > 1)
        browser()

      # compute the aggregate logit swing
      D <- sum(outcomes <- as.numeric(runif(n) < trueScores))
      updatedScores <- logitSwing(estScores, D)
      
      # report the change in correlation
      sapply(list(1, 0, c(0, 1)), FUN = function(w) {
        cor.prior <- cor(trueScores[whiteIndicators %in% w], estScores[whiteIndicators %in% w])
        cor.posterior <-  cor(trueScores[whiteIndicators %in% w], updatedScores[whiteIndicators %in% w])
        (cor.posterior - cor.prior)/cor.prior
      })
    })
})

# print the results
res <- t(do.call(rbind, results))
Table2 <- round(res, 3)
colnames(Table2) <- c('W (70%)', 'B (30%)', 'All', 'W (80%)', 'B (20%)', 'All', 'W (90%)', 'B (10%)', 'All') 
print(Table2)
